import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure
%matplotlib inline
A Library built on the python pandas and some libarires to work with the geospatial data
import geopandas as gpd
The area is North Delhi
df = gpd.read_file("rohini.geojson")
df.head()
df.shape
Extrating the Longitude and Latitude coordinates from the 'geography' column
df['Long'] = df['geometry'].x
df['Lat'] = df['geometry'].y
Converting the Coordinates into an array of geo pairs
coordinates = np.array([[df['Lat'],df['Long']]])
import folium
from folium import plugins
from folium.plugins import MarkerCluster
#Initiating a folium map instance of North Delhi Area
m = folium.Map([ 28.67304, 77.19767], zoom_start=12)
m
#Setting the Map to show the data points in circular markers
for index, row in df.iterrows():
folium.CircleMarker([row['Lat'], row['Long']],
radius=5,
popup=row['name'],
fill_color="#3db7e4", # divvy color,
).add_to(m)
#plotting the data points on map
m
# adding heatmap to our folium map to show the density of the data points
m.add_child(plugins.HeatMap(stationArr, radius=13))
m
Go on and play with the map to check out what shops are there in markets with high density
#Zipping the coordinated in a list
locations = list(zip(df.Lat, df.Long))
#Creating the icon for the data points
icons = [folium.Icon(icon="shop", prefix="fa") for _ in range(len(locations))]
cluster = MarkerCluster(locations=locations, icons=icons)
m.add_child(cluster)
m
from sklearn.cluster import DBSCAN
import sklearn.utils
from sklearn.preprocessing import StandardScaler
#Standardising the data for fitting
pairs= df[['Lat', 'Long']]
pairs = StandardScaler().fit_transform(pairs)
db = DBSCAN(eps=0.3, min_samples=7).fit(pairs)
labels = db.labels_
print (labels[500:560])
df["Market"]=labels
#Ignoring the data points outside the labels
realClusterNum=len(set(labels)) - (1 if -1 in labels else 0)
clusterNum = len(set(labels))
set(labels)
#Id are catogrized into different Markets and a new Market Column is added in dataframe
df.head()